
## Short messages for Figure 1B ## 
msgs <- read.table("../data/Short_Messages_2010_2018.txt", header=TRUE, sep="\t")

dat <- msgs

dat$date <- as.Date(dat$date, format="%d%b%Y")


library("data.table")
dat <- as.data.table(dat)

setkey(dat, dist_to_event, date, overnight_message)
dat$ct <-1 


## Plotting to see that it works ##
# dat_ag <- dat[,list(num_msg <- sum(ct)), by='dist_to_event']

#plot(V1~dist_to_event, data=dat_ag, type="l", bty="n",las=1,
#     ylab="Number of StockTwits posts", xlab="Event Time (t=0 short selling spike)", ylim=c(0, 350000))
# abline(v=0, lty="dotted", col="steelblue3")


dat$short <- grepl("short", dat$message_body) |
  grepl("Short", dat$message_body)


dat_ag <- dat[,list(num_msg = sum(ct),short_msg=sum(short)), by='dist_to_event']




plot(I(short_msg/num_msg)~dist_to_event, data=dat_ag, type="l", bty="n",las=1,
     ylab="Fraction tweets that mention 'short'", xlab="Days relative to spike in short sales")
points(I(short_msg/num_msg)~dist_to_event, data=dat_ag, pch=18)

 abline(v=0, lty="dotted", col="steelblue3")
